An Efficient Crop Recommendation using Machine Learning Techniques

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#Importing requied libraries
library(readr)
library(tidyverse)
library(tidymodels)
library(ggplot2)
library(dplyr)
library(caret)
library(e1071)
library(rpart)

crop <- read.csv2("Cropdata.csv", header = TRUE, sep = ",")
View(crop)
str(crop)
## 'data.frame':    902 obs. of  7 variables:
##  $ Time.line: Factor w/ 5 levels "2014-2015","2015-2016",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PH       : num  7.86 7.71 8.01 7.83 8.11 7.53 8.11 7.3 7.69 7.53 ...
##  $ EC       : num  0.931 0.694 1.11 1.09 1.14 1.02 1.14 1.02 0.921 1.02 ...
##  $ N        : num  168 100 91 100 82 ...
##  $ P        : num  17 19.9 16 19.9 17 19.9 17 19.9 17 19.9 ...
##  $ k        : num  32 43.6 99 43.6 102 43.6 1.2 43.6 36 43.6 ...
##  $ Total    : num  226 172 215 173 210 ...
head(crop)
##   Time.line   PH    EC     N    P     k   Total
## 1 2014-2015 7.86 0.931 168.0 17.0  32.0 225.791
## 2 2014-2015 7.71 0.694 100.1 19.9  43.6 172.004
## 3 2014-2015 8.01 1.110  91.0 16.0  99.0 215.120
## 4 2014-2015 7.83 1.090 100.1 19.9  43.6 172.520
## 5 2014-2015 8.11 1.140  82.0 17.0 102.0 210.250
## 6 2014-2015 7.53 1.020 100.1 19.9  43.6 172.150
summary(crop)
##      Time.line         PH                EC                 N          
##  2014-2015:219   Min.   :   0.36   Min.   :  0.0090   Min.   :   1.81  
##  2015-2016:154   1st Qu.:   7.72   1st Qu.:  0.8152   1st Qu.: 100.10  
##  2017-2018:224   Median :   7.96   Median :  1.0050   Median : 152.00  
##  2018-2019:114   Mean   :  18.70   Mean   :  7.2213   Mean   : 139.50  
##  2019-2020:191   3rd Qu.:   8.10   3rd Qu.:  1.0900   3rd Qu.: 169.00  
##                  Max.   :7388.00   Max.   :952.0000   Max.   :1725.00  
##        P                 k              Total       
##  Min.   :   0.13   Min.   :  0.20   Min.   :  51.9  
##  1st Qu.:  14.00   1st Qu.: 42.00   1st Qu.: 172.7  
##  Median :  17.00   Median : 43.60   Median : 224.0  
##  Mean   :  26.37   Mean   : 57.85   Mean   : 249.4  
##  3rd Qu.:  19.90   3rd Qu.: 81.00   3rd Qu.: 282.4  
##  Max.   :1282.00   Max.   :641.00   Max.   :7552.6
crop$Total <- round(crop$Total,0)

#**************************************************Step_1*******************************************
#The first step is create two new columns as follows:
# Categories in grade coloumn- Converting grades into low or high risk
crop_new <- mutate(crop, 
                 Crop_Type = case_when(Total %in% 1:200 ~ "Ground Nut",  
                                       Total %in%  200:214  ~ "Sugar Cane",
                                       Total %in% 215:235 ~ "Grape",
                                       Total %in% 236:244  ~ "Onion",
                                       Total %in% 245:250 ~ "Banana",
                                       Total  %in%  251:100000 ~ "Turmeric"))



#Creating a csv file 
write.table(crop_new, file = "crop_new.csv",
            sep = ",",
            row.names = FALSE)
View(crop_new)

Data Preparation

sample_set <- sample(2, nrow(crop_new), 
              replace = TRUE, 
              prob = c(0.7, 0.3))
train <- crop_new[sample_set==1,]
head(train)
##    Time.line   PH    EC     N    P    k Total  Crop_Type
## 1  2014-2015 7.86 0.931 168.0 17.0 32.0   226      Grape
## 2  2014-2015 7.71 0.694 100.1 19.9 43.6   172 Ground Nut
## 3  2014-2015 8.01 1.110  91.0 16.0 99.0   215      Grape
## 8  2014-2015 7.30 1.020 100.1 19.9 43.6   172 Ground Nut
## 10 2014-2015 7.53 1.020 100.1 19.9 43.6   172 Ground Nut
## 11 2014-2015 8.06 1.040  83.0 16.0 94.0   202 Sugar Cane
#Creating a csv file 
write.table(train, file = "crop_train.csv",
            sep = ",",
            row.names = FALSE)



test <- crop_new[sample_set==2,]
head(test)
##    Time.line   PH    EC     N    P     k Total  Crop_Type
## 4  2014-2015 7.83 1.090 100.1 19.9  43.6   173 Ground Nut
## 5  2014-2015 8.11 1.140  82.0 17.0 102.0   210 Sugar Cane
## 6  2014-2015 7.53 1.020 100.1 19.9  43.6   172 Ground Nut
## 7  2014-2015 8.11 1.140  82.0 17.0   1.2   109 Ground Nut
## 9  2014-2015 7.69 0.921  76.0 17.0  36.0   138 Ground Nut
## 14 2014-2015 8.12 1.400  81.0 17.0 102.0   210 Sugar Cane
#Creating a csv file 
write.table(crop_new, file = "test.csv",
            sep = ",",
            row.names = FALSE)

Data Cleaning

library(DataExplorer)
sum(is.na(train))
## [1] 0
sum(is.na(test))
## [1] 0
plot_missing(train)

# Exploratory Data Analysis (EDA) * describe - can computes the statistics of all numerical variables

library(Hmisc)
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:e1071':
## 
##     impute
## The following object is masked from 'package:parsnip':
## 
##     translate
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
describe(train)
## train 
## 
##  8  Variables      644  Observations
## --------------------------------------------------------------------------------
## Time.line 
##        n  missing distinct 
##      644        0        5 
## 
## lowest : 2014-2015 2015-2016 2017-2018 2018-2019 2019-2020
## highest: 2014-2015 2015-2016 2017-2018 2018-2019 2019-2020
##                                                             
## Value      2014-2015 2015-2016 2017-2018 2018-2019 2019-2020
## Frequency        147       111       162        87       137
## Proportion     0.228     0.172     0.252     0.135     0.213
## --------------------------------------------------------------------------------
## PH 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      644        0      163    0.999    9.213    3.166    7.301    7.546 
##      .25      .50      .75      .90      .95 
##    7.720    7.960    8.100    8.210    8.680 
## 
## lowest :   0.81000   1.12000   1.15000   2.06000   2.72000
## highest:  10.31000  11.26000  18.60000  18.70026 822.00000
##                                   
## Value          0    10    20   820
## Frequency      6   633     4     1
## Proportion 0.009 0.983 0.006 0.002
## 
## For the frequency table, variable is rounded to the nearest 10
## --------------------------------------------------------------------------------
## EC 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      644        0      229    0.999    6.099    10.68   0.1415   0.3130 
##      .25      .50      .75      .90      .95 
##   0.8108   0.9970   1.0800   1.2900   2.1055 
## 
## lowest :   0.009   0.040   0.050   0.060   0.070
## highest: 151.000 168.000 691.000 921.000 951.000
##                                                                       
## Value          0    10    20    80   120   150   170   690   920   950
## Frequency    623    12     1     1     1     2     1     1     1     1
## Proportion 0.967 0.019 0.002 0.002 0.002 0.003 0.002 0.002 0.002 0.002
## 
## For the frequency table, variable is rounded to the nearest 10
## --------------------------------------------------------------------------------
## N 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      644        0       77    0.942    138.5    40.58    100.1    100.1 
##      .25      .50      .75      .90      .95 
##    100.1    152.0    169.0    181.0    189.0 
## 
## lowest :   1.81   7.87   8.00  15.00  17.00, highest: 195.00 196.00 197.00 198.00 199.00
## --------------------------------------------------------------------------------
## P 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      644        0       39    0.963    27.36    25.07      9.0     11.0 
##      .25      .50      .75      .90      .95 
##     14.0     17.0     19.9     19.9    151.6 
## 
## lowest :    5    7    8    9   10, highest:  171  172  173  178 1282
## --------------------------------------------------------------------------------
## k 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      644        0       99    0.966    58.13    32.09    14.15    28.00 
##      .25      .50      .75      .90      .95 
##    42.00    43.60    81.00    95.00    99.00 
## 
## lowest :   0.20   0.87   1.00   4.00   6.00, highest: 130.00 144.00 146.00 160.00 641.00
## --------------------------------------------------------------------------------
## Total 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      644        0      154    0.993      239    74.85      172      172 
##      .25      .50      .75      .90      .95 
##      173      226      284      297      309 
## 
## lowest :   52   78  106  113  116, highest:  908 1079 1122 1152 1397
## --------------------------------------------------------------------------------
## Crop_Type 
##        n  missing distinct 
##      644        0        6 
## 
## lowest : Banana     Grape      Ground Nut Onion      Sugar Cane
## highest: Grape      Ground Nut Onion      Sugar Cane Turmeric  
##                                                                             
## Value          Banana      Grape Ground Nut      Onion Sugar Cane   Turmeric
## Frequency          14         74        232         17         42        265
## Proportion      0.022      0.115      0.360      0.026      0.065      0.411
## --------------------------------------------------------------------------------
describe(test)
## test 
## 
##  8  Variables      258  Observations
## --------------------------------------------------------------------------------
## Time.line 
##        n  missing distinct 
##      258        0        5 
## 
## lowest : 2014-2015 2015-2016 2017-2018 2018-2019 2019-2020
## highest: 2014-2015 2015-2016 2017-2018 2018-2019 2019-2020
##                                                             
## Value      2014-2015 2015-2016 2017-2018 2018-2019 2019-2020
## Frequency         72        43        62        27        54
## Proportion     0.279     0.167     0.240     0.105     0.209
## --------------------------------------------------------------------------------
## PH 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      258        0       90    0.999    42.38     69.4    7.319    7.630 
##      .25      .50      .75      .90      .95 
##    7.740    7.950    8.100    8.190    8.610 
## 
## lowest :    0.36    0.46    3.83    6.00    6.03
## highest:   10.57   18.60  765.00  768.00 7388.00
##                                   
## Value          0    20   760  7380
## Frequency    253     2     2     1
## Proportion 0.981 0.008 0.008 0.004
## 
## For the frequency table, variable is rounded to the nearest 20
## --------------------------------------------------------------------------------
## EC 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      258        0      118    0.999    10.02    18.37   0.2332   0.4650 
##      .25      .50      .75      .90      .95 
##   0.8310   1.0100   1.0900   1.2800   1.7100 
## 
## lowest :   0.07   0.08   0.09   0.12   0.14, highest: 101.00 124.00 248.00 898.00 952.00
##                                                     
## Value          0    10   100   120   250   900   950
## Frequency    250     3     1     1     1     1     1
## Proportion 0.969 0.012 0.004 0.004 0.004 0.004 0.004
## 
## For the frequency table, variable is rounded to the nearest 10
## --------------------------------------------------------------------------------
## N 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      258        0       61     0.94      142    52.89    99.94   100.10 
##      .25      .50      .75      .90      .95 
##   100.10   148.00   168.75   178.00   186.15 
## 
## lowest :   15.4   19.0   76.0   81.0   82.0, highest:  196.0  198.0  199.0  275.0 1725.0
## --------------------------------------------------------------------------------
## P 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      258        0       28    0.957    23.91    18.58      9.0     11.0 
##      .25      .50      .75      .90      .95 
##     14.0     17.0     19.9     19.9    131.7 
## 
## lowest :   0.13   1.00   5.00   7.00   8.00, highest: 165.00 168.00 171.00 172.00 178.00
## --------------------------------------------------------------------------------
## k 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      258        0       64    0.961    57.13    30.83     17.0     29.0 
##      .25      .50      .75      .90      .95 
##     41.0     43.6     78.0     96.0    105.7 
## 
## lowest :   1.2  12.0  13.0  14.0  17.0, highest: 122.0 128.0 146.0 175.0 196.0
## --------------------------------------------------------------------------------
## Total 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      258        0      108    0.991    275.4    155.5    172.0    172.0 
##      .25      .50      .75      .90      .95 
##    173.0    221.0    279.8    298.6    313.4 
## 
## lowest :   98  106  109  138  142, highest: 1045 1100 1112 1857 7553
##                                                                             
## Value        100   200   300   400   500   600   900  1000  1100  1900  7600
## Frequency      6   155    86     2     1     2     1     1     2     1     1
## Proportion 0.023 0.601 0.333 0.008 0.004 0.008 0.004 0.004 0.008 0.004 0.004
## 
## For the frequency table, variable is rounded to the nearest 100
## --------------------------------------------------------------------------------
## Crop_Type 
##        n  missing distinct 
##      258        0        6 
## 
## lowest : Banana     Grape      Ground Nut Onion      Sugar Cane
## highest: Grape      Ground Nut Onion      Sugar Cane Turmeric  
##                                                                             
## Value          Banana      Grape Ground Nut      Onion Sugar Cane   Turmeric
## Frequency           3         29         99          9         21         97
## Proportion      0.012      0.112      0.384      0.035      0.081      0.376
## --------------------------------------------------------------------------------
  • Two continuous variables
  • Taking PH & EC
library(ggplot2)

q <- ggplot(data = train, aes(x =Time.line , y = log(PH)  ))+
   geom_line(colour = "darkgreen") + 
  geom_point(aes(colour = factor(Crop_Type)), size =3) +
  geom_point(colour = "grey90", size = 1.5)+
  labs(title = 'Crop according to PH for  Time.line 2015-2020',
       y='PH of the soil',x='Time.line')
q

library(plotly)
 
fig <- train %>%
  plot_ly(
    x = ~log(PH), 
    y = ~log(P), 
    size = ~k, 
    color = ~Crop_Type, 
    frame = ~Time.line, 
    text = ~P, 
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
    
  )

fig <- fig %>% layout(
  xaxis = list(
    type = "log"
  )
)

fig
plot_ly(train, x = ~log(PH), y = ~Crop_Type , 
        type = 'scatter', 
        mode = 'markers',
        marker = list(color = "darkgreen" ),  opacity = 0.5) %>%  
  layout(title = 'Crop according to PH for  Time.line 2015-2020', 
                       yaxis = list(title = 'Time.line'), 
                       xaxis = list(title = 'PH of the soil ') )

Boosting Algorithms

train$Crop_Type <- as.factor(train$Crop_Type)
library(mlbench)
library(caret)

# Example of Boosting Algorithms
control <- trainControl(method="repeatedcv", number=10, repeats=3)
seed <- 7
metric <- "Accuracy"

Modelling

SvmRadial

set.seed(seed)
fit.svmRadial <- train(Crop_Type~., data=train, method="svmRadial", metric=metric, trControl=control)
fit.svmRadial
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 644 samples
##   7 predictor
##   6 classes: 'Banana', 'Grape', 'Ground Nut', 'Onion', 'Sugar Cane', 'Turmeric' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 580, 578, 579, 580, 578, 580, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.8330574  0.7470879
##   0.50  0.8438765  0.7661621
##   1.00  0.8526598  0.7812337
## 
## Tuning parameter 'sigma' was held constant at a value of 0.195314
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.195314 and C = 1.

Stochastic Gradient Boosting

# Stochastic Gradient Boosting
set.seed(seed)
fit.gbm <- train(Crop_Type~., data=train, method="gbm", metric=metric, trControl=control, verbose=FALSE)
fit.gbm
## Stochastic Gradient Boosting 
## 
## 644 samples
##   7 predictor
##   6 classes: 'Banana', 'Grape', 'Ground Nut', 'Onion', 'Sugar Cane', 'Turmeric' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 580, 578, 579, 580, 578, 580, ... 
## Resampling results across tuning parameters:
## 
##   interaction.depth  n.trees  Accuracy   Kappa    
##   1                   50      0.9989663  0.9984885
##   1                  100      0.9989581  0.9984760
##   1                  150      0.9994792  0.9992400
##   2                   50      0.9989663  0.9984900
##   2                  100      0.9968737  0.9954233
##   2                  150      0.9958481  0.9939448
##   3                   50      0.9989501  0.9984689
##   3                  100      0.9958406  0.9939460
##   3                  150      0.9942771  0.9916607
## 
## Tuning parameter 'shrinkage' was held constant at a value of 0.1
## 
## Tuning parameter 'n.minobsinnode' was held constant at a value of 10
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 150, interaction.depth =
##  1, shrinkage = 0.1 and n.minobsinnode = 10.

kNN

# kNN
set.seed(seed)
fit.knn <- train(Crop_Type~., data=train, method="knn", metric=metric, preProc=c("center", "scale"), trControl=control)
fit.knn
## k-Nearest Neighbors 
## 
## 644 samples
##   7 predictor
##   6 classes: 'Banana', 'Grape', 'Ground Nut', 'Onion', 'Sugar Cane', 'Turmeric' 
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 580, 578, 579, 580, 578, 580, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.8980746  0.8506664
##   7  0.8914000  0.8408056
##   9  0.8903744  0.8385319
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 5.

Model Selection

summarize results

# summarize results
boosting_results <- resamples(list(svmRadial=fit.svmRadial, gbm=fit.gbm, knn =fit.knn))
boosting_results
## 
## Call:
## resamples.default(x = list(svmRadial = fit.svmRadial, gbm = fit.gbm, knn
##  = fit.knn))
## 
## Models: svmRadial, gbm, knn 
## Number of resamples: 30 
## Performance metrics: Accuracy, Kappa 
## Time estimates for: everything, final model fit
summary(boosting_results)
## 
## Call:
## summary.resamples(object = boosting_results)
## 
## Models: svmRadial, gbm, knn 
## Number of resamples: 30 
## 
## Accuracy 
##               Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## svmRadial 0.800000 0.8437500 0.8582589 0.8526598 0.8691349 0.9062500    0
## gbm       0.984375 1.0000000 1.0000000 0.9994792 1.0000000 1.0000000    0
## knn       0.812500 0.8879788 0.9062500 0.8980746 0.9215650 0.9384615    0
## 
## Kappa 
##                Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## svmRadial 0.7066991 0.7696174 0.7843270 0.7812337 0.8013855 0.8601093    0
## gbm       0.9771999 1.0000000 1.0000000 0.9992400 1.0000000 1.0000000    0
## knn       0.7302424 0.8334432 0.8606617 0.8506664 0.8824172 0.9113838    0
dotplot(boosting_results)

# Bagging Algorithms ## Random Forest

# Random Forest
set.seed(seed)
fit.rf <- train(Crop_Type~., data=train, method="rf", metric=metric, trControl=control)
fit.rf
## Random Forest 
## 
## 644 samples
##   7 predictor
##   6 classes: 'Banana', 'Grape', 'Ground Nut', 'Onion', 'Sugar Cane', 'Turmeric' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 580, 578, 579, 580, 578, 580, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.9600648  0.9411590
##    6    0.9912337  0.9870981
##   10    0.9958654  0.9939269
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 10.
# # Example of Bagging algorithms
# control <- trainControl(method="repeatedcv", number=10, repeats=3)
# seed <- 7
# metric <- "Accuracy"
# # Bagged CART
# set.seed(seed)
# fit.treebag <- train(Class~., data=dataset, method="treebag", metric=metric, trControl=control)
# # Random Forest
# set.seed(seed)
# fit.rf <- train(Class~., data=dataset, method="rf", metric=metric, trControl=control)
# # summarize results
# bagging_results <- resamples(list(treebag=fit.treebag, rf=fit.rf))
# summary(bagging_results)
# dotplot(bagging_results)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
boston_bag <- randomForest(Crop_Type ~ ., data = train, mtry = 13, 
                          importance = TRUE, ntrees = 500)
## Warning in randomForest.default(m, y, ...): invalid mtry: reset to within valid
## range
boston_bag
## 
## Call:
##  randomForest(formula = Crop_Type ~ ., data = train, mtry = 13,      importance = TRUE, ntrees = 500) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 7
## 
##         OOB estimate of  error rate: 0.31%
## Confusion matrix:
##            Banana Grape Ground Nut Onion Sugar Cane Turmeric class.error
## Banana         14     0          0     0          0        0  0.00000000
## Grape           0    74          0     0          0        0  0.00000000
## Ground Nut      0     0        232     0          0        0  0.00000000
## Onion           0     1          0    16          0        0  0.05882353
## Sugar Cane      0     0          1     0         41        0  0.02380952
## Turmeric        0     0          0     0          0      265  0.00000000
boston_bag <- randomForest(Crop_Type ~ ., data = train, mtry = 13, 
                          importance = TRUE, ntrees = 500)
## Warning in randomForest.default(m, y, ...): invalid mtry: reset to within valid
## range
boston_bag
## 
## Call:
##  randomForest(formula = Crop_Type ~ ., data = train, mtry = 13,      importance = TRUE, ntrees = 500) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 7
## 
##         OOB estimate of  error rate: 0.31%
## Confusion matrix:
##            Banana Grape Ground Nut Onion Sugar Cane Turmeric class.error
## Banana         14     0          0     0          0        0  0.00000000
## Grape           0    74          0     0          0        0  0.00000000
## Ground Nut      0     0        232     0          0        0  0.00000000
## Onion           0     1          0    16          0        0  0.05882353
## Sugar Cane      0     0          1     0         41        0  0.02380952
## Turmeric        0     0          0     0          0      265  0.00000000
boston_forest <- randomForest(Crop_Type ~ ., data = train, mtry = 17, 
                             importance = TRUE, ntrees = 500)
## Warning in randomForest.default(m, y, ...): invalid mtry: reset to within valid
## range
boston_forest
## 
## Call:
##  randomForest(formula = Crop_Type ~ ., data = train, mtry = 17,      importance = TRUE, ntrees = 500) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 7
## 
##         OOB estimate of  error rate: 0.31%
## Confusion matrix:
##            Banana Grape Ground Nut Onion Sugar Cane Turmeric class.error
## Banana         14     0          0     0          0        0  0.00000000
## Grape           0    74          0     0          0        0  0.00000000
## Ground Nut      0     0        232     0          0        0  0.00000000
## Onion           0     1          0    16          0        0  0.05882353
## Sugar Cane      0     0          1     0         41        0  0.02380952
## Turmeric        0     0          0     0          0      265  0.00000000